
/********************************
Impact of Social Networks on EITC Claiming Behavior

Data prep file

Prepared by Riley Wilson

This file gives code to take the raw SCI data and construct the EITC Network Exposure measures

********************************/
/*First, you must set the directories to the file where 
(1) the data is located
(2) the output should be placed
(3) the proprietary SCI data is located*/

clear
set more off
global data "C:\Users\rwilson9\Box\Research\eitc_networks\submission\restat\final_submission\data"
global output "C:\Users\rwilson9\Box\Research\eitc_networks\submission\restat\final_submission\output"
global fbdata "C:\Users\rwilson9\Box\Research\FB_socialnetwork_data"

/****************************
Construct the county level EITC exposure measures
//you will merge this to cty_eitcnetwork1999_2013_nosci on county and year
****************************/
/*********************
First, Read in FB data, that will then be matched to State EITC policies
*********************/
cd $fbdata
insheet using County_County.csv, clear
/*I want to interpret sci as the number of friends or links. To do this, I am 
going to multiply by 400 so that .0025 (the smallest non zero) is equal to one. 
This might not actually be the number of links but it is some factor of it.*/

gen fb_links = sci*400

rename friend_county ctyfips
cd $data
merge m:1 ctyfips using county_pop_centroids2010
drop if _m == 2
drop _m
rename latitude f_lat
rename longitude f_lon
drop population
rename ctyfips f_ctyfips

rename own_county ctyfips
cd $data
merge m:1 ctyfips using county_pop_centroids2010
drop if _m == 2
drop _m
 
gen f_stfips = floor(f_ctyfips/1000)
gen stfips = floor(ctyfips/1000)

//calculate the distance between the counties
geodist latitude longitude f_lat f_lon, gen(dist) miles

//Merge on if the counties are on the same interstate line
cd $data
merge 1:1 ctyfips f_ctyfips using cty_sameinterstate
drop if _m == 2 //pairs with independent VA cities
replace same_interstate = 0 if _m == 1 
drop _m

/*********************
Merge on the percent of the population in EITC range in 2000 for friend counties
*********************/
cd $data
merge m:1 f_ctyfips using cty_eitcrange2000
// all merged
drop _m
/************************
Merge the friend's county to the presence of a state level EITC
************************/
rename stfips o_stfips
rename f_stfips stfips
cd $data
merge m:1 stfips using state_eitcrateswide1985_2018
drop _m
forval yr = 1985/2018 {
	gen any`yr' = yr`yr' ~= .
}

rename stfips f_stfips
rename o_stfips stfips

//Now we want to focus on out of state friends
drop if stfips == f_stfips
rename population pop_2010
bys ctyfips: egen tot_ostatef = sum(fb_links)
//cd $data
//save sci_distpred, replace
gen double fb_share = fb_link/tot_ostatef
replace dist = dist/1000
gen dist_same_interstate = dist*same_interstate
reghdfe fb_share dist same_interstate dist_same_interstate if stfips ~= f_stfips, absorb(ctyfips, savefe) vce(cluster ctyfips)
predict predfb_share, xbd
reghdfe fb_links dist same_interstate dist_same_interstate if stfips ~= f_stfips, absorb(ctyfips, savefe) vce(cluster ctyfips) 
predict predfb_links, xbd
bys ctyfips: egen predtot_ostatef = sum(predfb_links)
gen predfb_share1 = predfb_links/predtot_ostatef


forval y = 1985/2018 {
	replace yr`y' = 0 if yr`y' == .
	gen predyr`y' = yr`y'
	gen steitc_fblinks`y' = (yr`y'/100)*fb_links
	replace yr`y' = yr`y'*fb_share
	replace predyr`y' = predyr`y'*predfb_share
	gen fblinks_any`y' = any`y'*fb_links
	gen predfblinks_any`y' = any`y'*predfb_links
	gen predany`y' = any`y'
	gen fblinks_any_eitcrange`y' = any`y'*fb_links*pct_eitcrange2000
	gen fblinks_any_eitcrate`y' = any`y'*fb_links*eitc_rate1999
	gen fblinks_any_bunch`y' = any`y'*fb_links*bunch515_2000
	replace any`y' = any`y'*fb_share
	replace predany`y' = any`y'*predfb_share
	
}
//Begin on the same interstate has a big effect, also distance. Use these to "instrument"

collapse (sum) yr* any* fblinks_any* predyr* predany* predfblinks_any* steitc_fblinks* ,by(ctyfips pop_2010 stfips tot_ostatef) fast
drop yr2018 any2018

reshape long yr any fblinks_any predyr predany predfblinks_any fblinks_any_eitcrange fblinks_any_eitcrate fblinks_any_bunch steitc_fblinks , i(ctyfips) j(year)
rename yr steitc_pct
rename predyr predsteitc_pct
rename any pctany_steitc
rename predany predpctany_steitc

gen eitcexp_fblinkspp = (fblinks_any/pop_2010) //FB links exposed to a stated EITC per person in 2010
gen steitcexp_fblinkpctpp = (steitc_fblinks/pop_2010) //FB links exposed to a stated EITC per person in 2010 scalled by state eitc percent rate of federal
gen predeitcexp_fblinkspp = (predfblinks_any/pop_2010)
gen eitcexp_fblinkseitcrangepp = (fblinks_any_eitcrange/pop_2010) //FB links per person (weighted by share of people with income below 40 in place
gen eitcexp_fblinkseitcratepp = (fblinks_any_eitcrate/pop_2010) //FB links per person (weighted by share of people claiming the EITC in 1999)
gen eitcexp_fblinksbunchratepp = (fblinks_any_bunch/pop_2010) //FB links per person (weighted by share of people claiming the EITC in 2000 between 5-15K)

compress
cd $data
save cty_eitcexposure, replace


/****************************
Create EITC Exposure for the CZ*State level for the ACS data
//you will merge czst_eitcexposure.dta to hh_selfemp_network05_17_nosci.dta using year stfips czone
****************************/
//Get the Social network data to a commuting zone level
cd $data
use cty_eitcexposure, clear
gen cty_fips = ctyfips
replace cty_fips = 12025 if ctyfips == 12086
merge m:1 cty_fips using cw_cty_czone
drop if _m == 2 //cities in VA and counties in AK not in the FB data
drop if _m == 1 //counties in AK and Broomfield CO (created late)
drop _m

//collapse to the cz level
collapse (sum) fblinks_any predfblinks_any pop_2010, by(stfips czone year)
gen eitcexp_fblinkspp = (fblinks_any/pop_2010) //FB links exposed to a stated EITC per person in 2010
gen predeitcexp_fblinkspp = (predfblinks_any/pop_2010)

compress
cd $data
save czst_eitcexposure, replace

/*****************************
Create EITC Exposure for the DMA level for the Google Trends data
//you will merge dma_fbnetwork.dta 1:m  to dma_eitcgoogletrend_network04_17_nosci using dma_id year
*****************************/
cd $fbdata
insheet using County_County.csv, clear
/*I want to interpret sci as the number of friends or links. To do this, I am 
going to multiply by 400 so that .0025 (the smallest non zero) is equal to one. 
This might not actually be the number of links but it is some factor of it.*/

gen fb_links = sci*400

rename friend_county friend_ctyfips
rename own_county ctyfips
cd $data
merge m:1 ctyfips using cty_dma_xwalk2014
drop if _m == 1 //Alaska
drop if _m == 2 //Independent cities in Virginia (incorporated into surrounding county in FB data) and Alaska
drop _m
cd $data
merge m:1 dma_id using dma_stfips_xwalk
drop if _m == 2 //in AK
drop _m
//Now I wan to create DMA level measures that link to states
gen friend_stfips = floor(friend_ctyfips/1000)
forval i = 1/5 {
	drop if friend_stfips == stfips`i'
}
//There are no counties that have the same state as the friend county now
cd $data
merge m:1 ctyfips using county_pop_centroids2010
drop if _m == 2 //Alaska, Virgina cities, Puerto Rico
drop _m
rename population pop_2010
//Now I only want population once per county
bys ctyfips: gen n = _n
replace pop_2010 = 0 if n >1
rename stfips o_stfips
rename friend_stfips stfips
cd $data
merge m:1 stfips using state_eitcrateswide1985_2018
drop _m
forval yr = 1985/2018 {
	gen any`yr' = yr`yr' ~= .
}
rename stfips f_stfips
rename o_stfips stfips
forval y = 1985/2018 {
	gen fblinks_any`y' = any`y'*fb_links
}
//collapse to the DMA by state level
collapse (sum) fblinks_any* pop_2010,by(dma_id)
drop fblinks_any2018

reshape long fblinks_any, i(dma_id) j(year)

gen eitcexp_fblinkspp = (fblinks_any/pop_2010)
cd $data
save dma_fbnetwork, replace
